% Descriptive Statists
clear all;
small = 1.0e-10;

% -- File Directories   
figdir = 'fig/';
outdir = 'out/';
matdir = 'mat/';

% Load matlab files with data
fstr = [matdir 'calvec']; load(fstr);
fstr = [matdir 'country']; load(fstr);
fstr = [matdir 'country_code']; load(fstr);
fstr = [matdir 'yp']; load(fstr);
fstr = [matdir 'pop']; load(fstr);
% Update data to eliminate singletons (single value surrounded by missing
% values
yp = delete_singleton(yp);
n_t = size(calvec,1);
n_c = size(yp,2);
calvec = (1900:2017)';

% Compute growth rates
dyp = 100*dif(log(yp),1);
vec_dyp = reshape(dyp,[],1);
ii = isnan(vec_dyp);
mm = median(vec_dyp(ii==0));
fprintf('Median growth rate %5.3f \n',mm);

% Compute OECD Average and append to list of countries
% Load Lists
load([matdir 'oecd_list']);
i_oecd = zeros(n_c,1);
nn = size(oecd_list,1);
for j = 1:nn;
    ss = char(oecd_list(j));
    jj = colnumber(ss,country_code);
    if jj > 0;
      i_oecd(jj) = 1;
    end;
end;
%
% Compute OECD Average 
%
yp_oecd = yp(:,i_oecd'==1);
pop_oecd = pop(:,i_oecd'==1);
y_oecd = yp_oecd.*pop_oecd;
yp_oecd_all = NaN(n_t,1);
for t = 1:n_t;
    y = sum(packr(y_oecd(t,:)'));
    p = sum(packr(pop_oecd(t,:)'));
    yp_oecd_all(t) = y/p;
end;
yp = [yp yp_oecd_all];

yp_noecd = yp(:,i_oecd'==0);
pop_noecd = pop(:,i_oecd'==0);
y_noecd = yp_noecd.*pop_noecd;
yp_noecd_all = NaN(n_t,1);
for t = 1:n_t;
    y = sum(packr(y_noecd(t,:)'));
    p = sum(packr(pop_noecd(t,:)'));
    yp_noecd_all(t) = y/p;
end;

country_code{end+1} = 'OECD';
country{end+1}='OECD Countries';
n_c = n_c+1;

% Step 1:  Construct Low-frequency Trends
% Low Frequency Transformations
% Compute RW Covariance Matrix
cov_rw = rw_cov(n_t);
% Compute Regressors
trnd = ((1:1:n_t)/n_t)';
X = [trnd ones(n_t,1)];     % Trend and Constant Regressors
pX = X*inv(X'*X)*X';
S_fs = eye(n_t) - pX;
cov_fs = S_fs*cov_rw*S_fs';
q = 15;  % Note q in paper is q+1 for trend component
[evec_mat,eval]=eig(cov_fs,'vector');
A_fs = evec_mat(:,1:q);
eval_fs = eval(1:q);
Tvec = (1:1:n_t)';

% Construct LF-Transformations for each series
y_mat = log(yp);
yfit_mat = NaN(size(y_mat));
qtotal = 0;
yfit_reg = NaN(n_t,q+2,n_c);
for i = 1:n_c;
    y=y_mat(:,i);
    ii = ones(n_t,1)-isnan(y);
    S_i = eye(n_t);
    S_i = S_i(ii==1,:);
    n_i = sum(ii);
    X_i = X(ii==1,:);
    pX_i = X_i*inv(X_i'*X_i)*X_i';
    S_i = (eye(n_i)-pX_i)*S_i;
    cov_i = S_i*cov_rw*S_i';
    [evec_mat,eval]=eig(cov_i,'vector');
    % Order from largest to smallest
    [eval,kk] = sort(eval,'Descend');
    evec_mat = evec_mat(:,kk);
    jj = eval >= eval_fs(end);
    qtotal = qtotal + sum(jj);
    A_i = evec_mat(:,jj==1);
    tmp = packr(y);
    tmp_m = tmp - pX_i*tmp;
    C_i = A_i'*tmp_m;
    tmpfit = A_i*C_i + pX_i*tmp;
    yfit_mat(ii==1,i) = tmpfit;
    yfit_reg(ii==1,1:2,i) = X_i;
    yfit_reg(ii==1,3:size(A_i,2)+2,i) = A_i;
end;

% Save Trends for all Countries
fstr = [matdir 'yfit_mat'];save(fstr,'yfit_mat');

% Compute pairwise correlations
cvec = NaN(n_c*(n_c-1)/2,1);
k = 0;
for i = 1:n_c;
    for j = 2:n_c;
        a = packr(yfit_mat(:,[i j]));
        c = cor_compute(a(:,1),a(:,2));
        k = k+1;
        cvec(k) = c;
    end;
end;
fprintf('Average Pair-wise Correlation %5.2f \n',mean(cvec));


% Figure 1a:  Plot data
figure_1_a;


% Figure 1b:  Plot lf trends 
figure_1_b;

% Figure 2;
figure_2;

% Figure A1 .. plots with different groups of countries
figure_A1;


% Construct Growth Rates using countries with data available for both time
% periods
yp_all = yp(:,1:end-1);     % End is OECD average
pop_all = pop;

d_all = NaN(n_t,1);
d_oecd = NaN(n_t,1);
d_noecd = NaN(n_t,1);
d_usa = NaN(n_t,1);
[~,i_usa] = ismember('USA',country_code);
for t = 2:n_t;
    y0 = yp_all(t-1,:);
    p0= pop_all(t-1,:);
    y1 = yp_all(t,:);
    p1 = pop_all(t,:);
    jj = (isnan(y0)==0).*(isnan(p0)==0).*(isnan(y1)==0).*(isnan(p1)==0);
    yp0 = sum(y0(:,jj==1).*p0(:,jj==1),2)/(sum(p0(:,jj==1),2));
    yp1 = sum(y1(:,jj==1).*p1(:,jj==1),2)/(sum(p1(:,jj==1),2));
    d_all(t) = 100*(log(yp1/yp0));
    
    y0 = yp_all(t-1,i_oecd'==1);
    p0= pop_all(t-1,i_oecd'==1);
    y1 = yp_all(t,i_oecd'==1);
    p1 = pop_all(t,i_oecd'==1);
    jj = (isnan(y0)==0).*(isnan(p0)==0).*(isnan(y1)==0).*(isnan(p1)==0);
    yp0 = sum(y0(:,jj==1).*p0(:,jj==1),2)/(sum(p0(:,jj==1),2));
    yp1 = sum(y1(:,jj==1).*p1(:,jj==1),2)/(sum(p1(:,jj==1),2));
    d_oecd(t) = 100*(log(yp1/yp0));
    
    y0 = yp_all(t-1,i_oecd'==0);
    p0= pop_all(t-1,i_oecd'==0);
    y1 = yp_all(t,i_oecd'==0);
    p1 = pop_all(t,i_oecd'==0);
    jj = (isnan(y0)==0).*(isnan(p0)==0).*(isnan(y1)==0).*(isnan(p1)==0);
    yp0 = sum(y0(:,jj==1).*p0(:,jj==1),2)/(sum(p0(:,jj==1),2));
    yp1 = sum(y1(:,jj==1).*p1(:,jj==1),2)/(sum(p1(:,jj==1),2));
    d_noecd(t) = 100*(log(yp1/yp0));
    
    y0 = yp_all(t-1,i_usa);
    p0= pop_all(t-1,i_usa);
    y1 = yp_all(t,i_usa);
    p1 = pop_all(t,i_usa);
    jj = (isnan(y0)==0).*(isnan(p0)==0).*(isnan(y1)==0).*(isnan(p1)==0);
    yp0 = sum(y0(:,jj==1).*p0(:,jj==1),2)/(sum(p0(:,jj==1),2));
    yp1 = sum(y1(:,jj==1).*p1(:,jj==1),2)/(sum(p1(:,jj==1),2));
    d_usa(t) = 100*(log(yp1/yp0));
end;

tmp = packr([d_usa d_oecd d_noecd d_all]);

% Table 1_a results
outfile_name = [outdir 'Table_1_a.csv'];
fileID = fopen(outfile_name,'w');
fprintf(fileID,'Countries, 1-30, 31-60, 61-90, 91-118 \n');
fprintf(fileID,'US,');
j = 1;
a = [mean(tmp(1:30,j)) mean(tmp(31:60,j)) mean(tmp(61:90,j)) mean(tmp(91:117,j))];
prtmat_comma(a,fileID,'%5.1f','\n');

fprintf(fileID,'OECD,');
j = 2;
a = [mean(tmp(1:30,j)) mean(tmp(31:60,j)) mean(tmp(61:90,j)) mean(tmp(91:117,j))];
prtmat_comma(a,fileID,'%5.1f','\n');

fprintf(fileID,'Non-OECD,');
j = 3;
a = [mean(tmp(1:30,j)) mean(tmp(31:60,j)) mean(tmp(61:90,j)) mean(tmp(91:117,j))];
prtmat_comma(a,fileID,'%5.1f','\n');

fprintf(fileID,'All,');
j = 4;
a = [mean(tmp(1:30,j)) mean(tmp(31:60,j)) mean(tmp(61:90,j)) mean(tmp(91:117,j))];
prtmat_comma(a,fileID,'%5.1f','\n');


% Table 1_b results
pct_vec = [0.10 0.25 0.50 0.75 0.90];
outfile_name = [outdir 'Table_1_b.csv'];
fileID = fopen(outfile_name,'w');
fprintf(fileID,'date, median, s, 75-25, 90-10 \n');
fprintf(fileID,'1950 - 1954 ,');
ii = (calvec >= 1950) .* (calvec <= 1954);
tmp = log(yp(ii==1,1:end-1));
m = mean(tmp,1)';
m = packr(m);
a = pctile(m,pct_vec);
s = std(m);
b = [a(3) s a(4)-a(2) a(5)-a(1)];
prtmat_comma(b,fileID,'%5.1f','\n');

fprintf(fileID,'1971 - 1975 ,');
ii = (calvec >= 1971) .* (calvec <= 1975);
tmp = log(yp(ii==1,1:end-1));
m = mean(tmp,1)';
m = packr(m);
a = pctile(m,pct_vec);
s = std(m);
b = [a(3) s a(4)-a(2) a(5)-a(1)];
prtmat_comma(b,fileID,'%5.1f','\n');

fprintf(fileID,'1992 - 1996 ,');
ii = (calvec >= 1992) .* (calvec <= 1996);
tmp = log(yp(ii==1,1:end-1));
m = mean(tmp,1)';
m = packr(m);
a = pctile(m,pct_vec);
s = std(m);
b = [a(3) s a(4)-a(2) a(5)-a(1)];
prtmat_comma(b,fileID,'%5.1f','\n');

fprintf(fileID,'2013 - 2017 ,');
ii = (calvec >= 2013) .* (calvec <= 2017);
tmp = log(yp(ii==1,1:end-1));
m = mean(tmp,1)';
m = packr(m);
a = pctile(m,pct_vec);
s = std(m);
b = [a(3) s a(4)-a(2) a(5)-a(1)];
prtmat_comma(b,fileID,'%5.1f','\n');


% --- Table 1(c) Results
ii_1 = (calvec >= 1960).*(calvec<=1988);
ii_2 = calvec>=1989;

y1 = yp(ii_1==1,1:end-1);
y2 = yp(ii_2==1,1:end-1);
t1 = mean(y1)';
t2 = mean(y2)';
[s1,jj1] = sort(t1);
[s2,jj2] = sort(t2);

k1 = NaN(113,1);
k2 = NaN(113,1);  % Note: lowest group has 29 countries; all other have 28
k1(jj1(1:29)) = 1;
k1(jj1(30:57)) = 2;
k1(jj1(58:85)) = 3;
k1(jj1(86:113)) = 4;

k2(jj2(1:29)) = 1;
k2(jj2(30:57)) = 2;
k2(jj2(58:85)) = 3;
k2(jj2(86:113)) = 4;

tab_q = zeros(4,4);
for i = 1:4;
    tmp = k2(k1==i);
    ni = 29;
    if i > 1;
        ni = 28;
    end;
    for j = 1:4;
        tab_q(i,j) = sum((tmp==j))/ni;
    end;
end;


outfile_name = [outdir 'Table_1_c.csv'];
fileID = fopen(outfile_name,'w');
fprintf(fileID,'transition \n');
prtmat_comma(tab_q,fileID,'%5.2f','\n');

fprintf(fileID,'\n\n');
fprintf(fileID,'Start in Highest Quartile 1: Probability of being of being in bottom 2 quartiles after: \n');
p = tab_q';
x = [1 0 0 0]';
for i = 1:50;
    x = p*x;
    fprintf(fileID,'%3i  %5.2f \n',[i x(3)+x(4)]);
end;

fprintf('\n\n');
fprintf(fileID,'Start in lowest: Probability of moving to in top 2 quartiles after: \n');
p = tab_q';
x = [0 0 0 1]';
for i = 1:50;
    x = p*x;
    fprintf(fileID,'%3i  %5.2f \n',[i x(1)+x(2)]);
end;

